In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
In [2]:
df=pd.read_csv("country_wise_latest.csv")
In [3]:
df.shape
Out[3]:
(187, 15)
In [5]:
df.head()
Out[5]:
Country/Region Confirmed Deaths Recovered Active New cases New deaths New recovered Deaths / 100 Cases Recovered / 100 Cases Deaths / 100 Recovered Confirmed last week 1 week change 1 week % increase WHO Region
0 Afghanistan 36263 1269 25198 9796 106 10 18 3.50 69.49 5.04 35526 737 2.07 Eastern Mediterranean
1 Albania 4880 144 2745 1991 117 6 63 2.95 56.25 5.25 4171 709 17.00 Europe
2 Algeria 27973 1163 18837 7973 616 8 749 4.16 67.34 6.17 23691 4282 18.07 Africa
3 Andorra 907 52 803 52 10 0 0 5.73 88.53 6.48 884 23 2.60 Europe
4 Angola 950 41 242 667 18 1 0 4.32 25.47 16.94 749 201 26.84 Africa
In [6]:
df.isnull().sum()
Out[6]:
Country/Region            0
Confirmed                 0
Deaths                    0
Recovered                 0
Active                    0
New cases                 0
New deaths                0
New recovered             0
Deaths / 100 Cases        0
Recovered / 100 Cases     0
Deaths / 100 Recovered    0
Confirmed last week       0
1 week change             0
1 week % increase         0
WHO Region                0
dtype: int64
In [7]:
df.columns
Out[7]:
Index(['Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Active',
       'New cases', 'New deaths', 'New recovered', 'Deaths / 100 Cases',
       'Recovered / 100 Cases', 'Deaths / 100 Recovered',
       'Confirmed last week', '1 week change', '1 week % increase',
       'WHO Region'],
      dtype='object')
In [26]:
plt.figure(figsize=(10,5))
df.groupby('WHO Region')['Confirmed'].sum().sort_values(ascending = False).plot(kind='bar')
plt.title("Total confiremd cases by region")
plt.xlabel('Region')
plt.ylabel('Total confirmed cases')
plt.grid(axis='y')
plt.xticks(rotation=50)
plt.show()
In [29]:
plt.figure(figsize=(10,5))
df.groupby('Country/Region')['Active'].sum().sort_values(ascending = False).head(10).plot(kind='bar')
plt.title("Top 10 countries with highest active cases")
plt.xlabel('Countries')
plt.ylabel('Active')
plt.grid(True)
plt.xticks(rotation=50)
plt.show()
In [59]:
plt.figure(figsize=(10,5))
correlation=df[['Confirmed','Deaths','Recovered','Active']].corr()
correlation
plt.imshow(correlation,cmap='coolwarm',interpolation='nearest')
plt.colorbar()
plt.xticks(range(len(correlation.columns)),correlation.columns,rotation=45)
plt.yticks(range(len(correlation.columns)),correlation.columns)
plt.show()
In [60]:
df['fatality_rate']=df['Deaths']/df['Confirmed']*100
In [61]:
df['recovery_rate']=df['Recovered']/df['Confirmed']*100
In [67]:
plt.figure(figsize=(10,5))
sns.scatterplot(x='fatality_rate',y='recovery_rate',data=df)
plt.title("fatality_rate vs recovery_rate ")
plt.xlabel('fatality')
plt.ylabel('recovery')
plt.grid(True)
plt.xticks()
plt.show()
In [72]:
plt.figure(figsize=(15,5))
df.groupby('Country/Region')['New cases'].sum().sort_values(ascending = False).head(10).plot(kind='pie')
plt.title("Top 10 countries with highest new cases")
plt.xlabel('Country')
plt.ylabel('New cases')
plt.grid(axis='y')
plt.xticks(rotation=50)
plt.show()
In [73]:
plt.figure(figsize=(10,5))
sns.scatterplot(x='Deaths',y='Recovered',data=df)
plt.title("Deaths vs Recovered ")
plt.xlabel('Deaths')
plt.ylabel('Recovered')
plt.grid(True)
plt.xticks()
plt.show()
In [76]:
df6.index
Out[76]:
Index(['US', 'Brazil', 'United Kingdom', 'Mexico', 'Italy', 'India', 'France',
       'Spain', 'Peru', 'Iran'],
      dtype='object', name='Country/Region')
In [80]:
plt.figure(figsize=(15,5))
df6=df.groupby('Country/Region')['Deaths'].sum().sort_values(ascending = False).head(10)
pie_chart=plt.pie(df6, labels=df6.index, autopct='%0.2f%%',startangle=140)
plt.title(" Top 10 countries with maximum covid reporting ")

plt.grid(True)
plt.xticks()
plt.show()
In [81]:
px.scatter(df,x='Confirmed',y='Deaths',color='WHO Region')
In [84]:
df.head()
Out[84]:
Country/Region Confirmed Deaths Recovered Active New cases New deaths New recovered Deaths / 100 Cases Recovered / 100 Cases Deaths / 100 Recovered Confirmed last week 1 week change 1 week % increase WHO Region fatality_rate recovery_rate
0 Afghanistan 36263 1269 25198 9796 106 10 18 3.50 69.49 5.04 35526 737 2.07 Eastern Mediterranean 3.499435 69.486805
1 Albania 4880 144 2745 1991 117 6 63 2.95 56.25 5.25 4171 709 17.00 Europe 2.950820 56.250000
2 Algeria 27973 1163 18837 7973 616 8 749 4.16 67.34 6.17 23691 4282 18.07 Africa 4.157581 67.339935
3 Andorra 907 52 803 52 10 0 0 5.73 88.53 6.48 884 23 2.60 Europe 5.733186 88.533627
4 Angola 950 41 242 667 18 1 0 4.32 25.47 16.94 749 201 26.84 Africa 4.315789 25.473684
In [97]:
#plt.figure(figsize=(15,5))
df9=df.groupby('Country/Region')['Deaths / 100 Cases'].sum().sort_values(ascending = False).head(4)
colors=['green','red','violet','gold']
df9
Out[97]:
Country/Region
Yemen             28.56
United Kingdom    15.19
Belgium           14.79
Italy             14.26
Name: Deaths / 100 Cases, dtype: float64
In [98]:
plt.barh(df9.index,df9,color=colors)
plt.show()
In [ ]: